rm(list = ls()[!ls()%in%c("drop_attn_fails","n_boot")])
library(tidyverse)

types <- c("Full sample", "Sample without attention fails", "Weighted")

for(j in 1:length(types)){

  load("../working/survey_data_for_quota_checks.Rdata")
  
  uk <- uk %>% mutate(time = as.numeric(as.POSIXct(EndDate))) %>% arrange(time)
  us <- us %>% mutate(time = as.numeric(as.POSIXct(EndDate))) %>% arrange(time)
  de <- de %>% mutate(time = as.numeric(as.POSIXct(EndDate))) %>% arrange(time)
  
# Drop attention check failures

  if(types[j] == "Weighted"){
    
    uk <- uk %>% filter(pers_rep1_5 == "Agree") # Drops attn check fails
    us <- us %>% filter(pers_rep1_5 == "Agree") # Drops attn check fails
    de <- de %>% filter(pers_rep1_5 == "Stimme zu") # Drops attn check fails  
    
  }
  
  if(types[j] == "Sample without attention fails"){
  
  uk <- uk %>% filter(pers_rep1_5 == "Agree") # Drops attn check fails
  us <- us %>% filter(pers_rep1_5 == "Agree") # Drops attn check fails
  de <- de %>% filter(pers_rep1_5 == "Stimme zu") # Drops attn check fails  

  uk$wgt <- 1
  de$wgt <- 1
  us$wgt <- 1
  
  }

if(types[j] == "Full sample"){
  
  uk$wgt <- 1
  de$wgt <- 1
  us$wgt <- 1
  
}

# Check distributions against quota targets

uk_age_gender <- readxl::read_excel("../data/uk.xlsx", sheet = "age_gender")
uk_region <- readxl::read_excel("../data/uk.xlsx", sheet = "region")
uk_education <- readxl::read_excel("../data/uk.xlsx", sheet = "educ_simple")

uk_age_gender <- uk %>% 
  group_by(Gender, age_cat) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         Age = age_cat,
         category = paste0(Gender, " ", age_cat),
         country = "UK",
         type = "age_gender") %>%
  full_join(uk_age_gender)

uk_region <- uk %>% 
  group_by(region) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = region,
         country = "UK",
         type = "region") %>%
  full_join(uk_region)

uk_education <- uk %>% 
  mutate(educ2 = recode_factor(educ,
                               "No qual" = "Level 1",
                               "No qual" = "Level 1",
                               "Level 2" = "Level 1",
                               "Other" = "Level 1")) %>%
  group_by(educ2) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = educ2,
         country = "UK",
         type = "educ") %>%
  full_join(uk_education)


us_age_gender <- readxl::read_excel("../data/us.xlsx", sheet = "age_gender")
us_region <- readxl::read_excel("../data/us.xlsx", sheet = "region")
us_education <- readxl::read_excel("../data/us.xlsx", sheet = "educ")

us_age_gender <- us %>% 
  group_by(Gender, age_cat) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         Age = age_cat,
         category = paste0(Gender, " ", age_cat),
         country = "US",
         type = "age_gender") %>%
  full_join(us_age_gender)

us_region <- us %>% 
  group_by(region) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = region,
         country = "US",
         type = "region") %>%
  full_join(us_region)

us_education <- us %>% 
  group_by(educ) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = educ,
         country = "US",
         type = "educ") %>%
  full_join(us_education)

de_age_gender <- readxl::read_excel("../data/de.xlsx", sheet = "age_gender")
de_region <- readxl::read_excel("../data/de.xlsx", sheet = "region")
de_education <- readxl::read_excel("../data/de.xlsx", sheet = "educ")


  

de_age_gender <- de %>% 
  mutate(age_cat = recode_factor(age_cat,
                                 "55-64" = "55+",
                                 "65+" = "55+")) %>%
  group_by(Gender, age_cat) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         Age = age_cat,
         category = paste0(Gender, " ", age_cat),
         country = "DE",
         type = "age_gender") %>%
  full_join(de_age_gender)

de_region <- de %>% 
  group_by(region) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = region,
         country = "DE",
         type = "region") %>%
  full_join(de_region)

de_education <- de %>% 
  group_by(educ) %>%
  summarise(sample_n = sum(wgt, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(sample_prop = sample_n/sum(sample_n, na.rm = TRUE),
         category = educ,
         country = "DE",
         type = "educ") %>%
  full_join(de_education)


## Plot distributions 

quotas <- bind_rows(de_age_gender, uk_age_gender, us_age_gender,
                    de_region, uk_region, us_region,
                    de_education, uk_education, us_education) %>%
  select(country, type, category, sample_n, sample_prop, target_n, target_prop)

quotas <- quotas %>% mutate(diff = (sample_prop*100) - (target_prop*100)) 


countries <- c("US", "UK", "DE")
plot_list <- list()
for(i in 1:length(countries)){
  
  if(countries[i] == "DE") country_n <- nrow(de)
  if(countries[i] == "UK") country_n <- nrow(uk)
  if(countries[i] == "US") country_n <- nrow(us)
  
  plot_list[[i]] <- quotas %>%
    filter(country == countries[i]) %>%
    ggplot(aes(x = diff, y = category)) + geom_point() + facet_grid(type~country, scales = "free_y") + 
    xlim(c(-6.4,6.4)) + 
    theme_bw() + 
    geom_vline(xintercept = 0, linetype = 2) + 
    xlab("Sample proportion - target proportion") + 
    ylab("")  + 
    ggtitle(paste0(types[j], "\n", "n = ", country_n))
  
}



if(types[j] == "Full sample") title <- "sample_quota_targets_full_sample.pdf"
if(types[j] != "Full sample") title <- "sample_quota_targets_attention_fails_removed.pdf"
if(types[j] == "Weighted") title <- "sample_quota_targets_weighted.pdf"

pdf(paste0("../output/plots/", title), 15, 6)
gridExtra::grid.arrange(plot_list[[1]],
                        plot_list[[2]],
                        plot_list[[3]],
                        ncol = 3)
dev.off()
}
